#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os,sys
import Queue
import threading
import urllib2
from readability.readability import Document

reload(sys)
sys.setdefaultencoding('utf-8')

class BatchGetFullText(threading.Thread):
    """ Thread grab full html text """
    def __init__(self, queue, out_queue, timeout):
        threading.Thread.__init__(self)
        self.queue = queue
        self.out_queue = out_queue
        self.timeout = timeout

    def run(self):
        while True:
            # grab url from queue
            url = self.queue.get()

            try:
                # timeout
                html = urllib2.urlopen(url, timeout=self.timeout).read()
                readable_article = Document(html).summary()
                # put result in queue, so main program will process it.
                self.out_queue.put((url, readable_article))
            except IOError:
                pass

            self.queue.task_done()


class GetFullText():
    """
        fetch a page then full text output (single)
    """
    def __init__(self, timeout):
        self.timeout = timeout

    def fetch(self,url):
        """
            get full text
        """
        try:
            # timeout
            html = urllib2.urlopen(url, timeout=self.timeout).read()
            readable_article = Document(html).summary()
            return readable_article
        except IOError:
            pass
        return ""


if __name__ == "__main__":

    fetchingUrl = sys.argv[1]

    if not fetchingUrl:
        print ""
    else:
        processor = GetFullText(10)
        print processor.fetch(fetchingUrl)

    # queue = Queue.Queue()
    # out_queue = Queue.Queue()
    #
    # for i in range(5):
    #     t = BatchGetFullText(queue, out_queue, 10)
    #     t.setDaemon(True)
    #     t.start()
    #
    # urls = ["http://coolshell.cn/articles/10975.html"]
    #
    # # put url in threads pool
    # for url in urls:
    #     queue.put(url)
    #
    # # wait threads
    # queue.join()
    #
    # while not out_queue.empty():
    #     url, content = out_queue.get()
    #     print url, content
    #     print